/*******************************************************************************

PooledRegions analysis_FARMER.do

2018.08.30 (DJ) Created

*******************************************************************************/


			
/******************************************************************************
*******************************************************************************
	Farmer Level Market Access Input Adoption and Output Sales
*******************************************************************************
*******************************************************************************/	
// sampling weight
global sampweight numhh_pervillage
		
local indep1 std_google_vil_city_km
local indep2 DH_access_std

local j 0		
foreach filename in _weightedDIST _DHaccess{
	local ++j
	local indep `indep`j''
		
	/*******************************************************************************
		compile DEPENDENT variables
	*******************************************************************************/
		
		#d;
		local farmer_varlist "
			used_fert fert_qty_w5 used_seeds seeds_qty_w5 
			sell_maize sell_qty_w1 agent_came sell_agent sell_qty_agent_w5
			sell_market_16 sell_qty_mkt_16_w1
			ever_buy_maize buy_maize_typical_qty_w5
			buy_only_not_sell sell_only_not_buy buy_and_sell net_buyer net_seller
			"
		 ;
		 
		tokenize `farmer_varlist';
		local vars=wordcount("`farmer_varlist'");
		di `vars';
		
		 
		local farmer_controls 
		 "farmer_age_r farmer_female_r farmer_married_r farmer_years_education_r farmer_iron_roof_r farmer_not_mud_walls_r farmer_mud_floor_r 
		 farmer_bank_account_r farmer_mobile_money_r farmer_acres_land_r farmer_hh_size_r 
		 farmer_mobile_phone_r farmer_pc1_durables_r farmer_pc1_animals_r farmer_has_business_r farmer_total_income_USD_r";
		
		local farmer_missing
		 "farmer_age_m farmer_female_m farmer_married_m farmer_years_education_m farmer_iron_roof_m farmer_not_mud_walls_m farmer_mud_floor_m 
		 farmer_bank_account_m farmer_mobile_money_m farmer_acres_land_m farmer_hh_size_m 
		 farmer_mobile_phone_m farmer_pc1_durables_m farmer_pc1_animals_m farmer_has_business_m farmer_total_income_USD_m";
		
		local faocontrol "fao_yclr fao_ychr fao_sxlr fao_sxhr";
		
		#d cr
	
		
	
	/*******************************************************************************
		append Manyara and Kilimanjaro Farmer Data sets
	*******************************************************************************/
		
		// bring in trip cost to the best travel cost-adj price at the village level
	
			// Kilimanjaro
				use "${analysis}/Kilimanjaro ViltoAV_censusappended.dta", clear
				gen survey_region="KILIMANJARO"
				
				tempfile kili_vilaccess
				save `kili_vilaccess'
				
			// Manyara
				use "${analysis}/Manyara ViltoAV_censusappended.dta", clear
				rename (survey_district survey_ward survey_village) (district ward village_name)
				gen survey_region="MANYARA"
				append using `kili_vilaccess'
				
				keep district ward village_name survey_region travelcost_DR_adj_av_urea_usd min_DR_adj_price_50kg onewaycost_DR_adj_av_urea_usd
				tempfile tripcost
				save `tripcost'
				

				
		// append farmer-level data sets	
		use "${analysis}/Manyara Farmer_mktaccess.dta", clear
			drop secf*
			rename (survey_district survey_ward survey_village) (district ward village_name)
			
			// add placeholders for the variables that have not been calculated. 
			// This makes it easier for excel formatting of the regression tables.
			gen buy_only_not_sell15 = .
			gen sell_only_not_buy15 = .
			
			gen buy_only_not_sell = buy_only_not_sell17
			gen sell_only_not_buy = sell_only_not_buy17
			
			//keep survey_region district ward village_name ${sampweight} google_vil_* google_farmer* ///
			//	 `farmer_varlist' `farmer_controls' `farmer_missing' `faocontrol'
			
			//drop if district=="MBULU"|district=="HANANG"
			
			tempfile manyarafarmer
			save `manyarafarmer'
			
		
		use "${analysis}/Kilimanjaro Farmer_mktaccess.dta", clear
			// bring in the sampling weight
			merge m:1 village_name ward district using "${analysis}/Kilimanjaro Census.dta"
			keep if _merge==3
			drop _merge
			rename census_numhh_pervillage numhh_pervillage
			append using `manyarafarmer'
			
	
		// bring in tripcost to the best travel cost-adj villages
			merge m:1  survey_region district ward village_name using `tripcost'
	
		
			
		cap drop village_cluster_id villageunique
		egen village_cluster_id = group(district ward village_name)
		egen villageunique = tag(village_cluster_id)
	
		
		// generate MARKET ACCESS PROXIES
			gen DH_access = .
			foreach city in moshi arusha babati dodoma tanga{
				replace DH_access = 0 if !mi(google_vil_`city'_km)
			}
			foreach city in moshi arusha babati dodoma tanga{
				gen tau_`city' = 2.7*(0.9392 + 0.02019*google_vil_`city'_km)/25
				replace DH_access = DH_access + (1 + tau_`city')^(${elasticity})*pop_frac_`city'
			}
			egen DH_access_std = std(DH_access)
			replace DH_access_std = -DH_access_std
			
			gen l_google_vil_city_hrs 		=log(google_vil_city_hrs)
			gen l_google_vil_city_km		=log(google_vil_city_km)
			gen l_google_vil_nearcity_km	=log(google_vil_nearcity_km)
			
			egen std_google_vil_city_km			=std(google_vil_city_km)
			egen std_google_vil_nearcity_km		=std(google_vil_nearcity_km)
			egen std_google_vil_near_noB_km = std(google_vil_nearcity_noB_km)
				
	
	
	/*******************************************************************************
		run analysis
	*******************************************************************************/
	
	#d;
	pause off;
	local i 1;
	while `i' <= `vars' {;
		local append_replace="append";
		if `i'==1 {; local append_replace="replace";};
			 
		************************************************;
		*1. Weighted Regressions;
		************************************************;
	
		*A. no controls;
		*quietly reg ``i'' ${indvar`j'} ;
		xi: reg ``i'' `indep' [pweight=${sampweight}], cluster(village_cluster_id);
		
		mean ``i'' if e(sample) [pweight=${sampweight}];
		local mean=_b[``i''];
		sum ``i'' if e(sample);
		local sd=`r(sd)';
		
		xi: reg ``i'' `indep' [pweight=${sampweight}], cluster(village_cluster_id); // need to run this one more time because estimation is lost due to "mean" command
		quietly outreg2 using "${pool_results}/PooledRegions Farmer`filename'.xls", nonote se symb(***,**,*) `append_replace' dec(2)
		 addstat("#of villages", e(N_clust), "mean of depvar", `mean', "sd of depvar", `sd') keep(`indep');
		
		*B. All controls;
		xi: reg ``i'' `indep' `faocontrol' `farmer_controls' `farmer_missing' [pweight=${sampweight}], cluster(village_cluster_id);
		
		mean ``i'' if e(sample) [pweight=${sampweight}];
		local mean=_b[``i''];
		sum ``i'' if e(sample);
		local sd=`r(sd)';
		
		xi: reg ``i'' `indep' `faocontrol' `farmer_controls' `farmer_missing' [pweight=${sampweight}], cluster(village_cluster_id); // need to run this one more time because estimation is lost due to "mean" command
		quietly outreg2 using "${pool_results}/PooledRegions Farmer`filename'.xls", nonote se symb(***,**,*) append dec(2) nocons
		 addstat("#of villages", e(N_clust), "mean of depvar", `mean', "sd of depvar", `sd', "FAO controls + farmer controls", 1) 
		 keep(`indep' `faocontrol' `farmer_controls');
		 
		 
		local i = `i' + 1;
	};
	#d cr
		
}

	


/******************************************************************************
*******************************************************************************
	Farmer/Farm Correlates
*******************************************************************************
*******************************************************************************/
** macros
	
// sampling weight
global sampweight numhh_pervillage

local indep1 std_google_vil_city_km
local indep2 DH_access_std

local j 0		
foreach filename in _weightedDIST_Table3_20220302 _DHaccess_Table3_20220302{
	local ++j
	local indep `indep`j''
	
	/*******************************************************************************
		compile DEPENDENT variables
	*******************************************************************************/
		
		#d;
		local farmer_varlist "
		farmer_age
		farmer_female
		farmer_married
		farmer_hh_size
		farmer_years_education
		farmer_thatch_roof
		farmer_mobile_phone
		farmer_bank_account
		farmer_mobile_money
		farmer_acres_land
		farmer_has_business
		farmer_total_income_USD_w5
		fao_yclr_kg_acre
		fao_ychr_kg_acre
		fao_product_diff
		harvest_kg_w5
		harv_peracre_w5
		harv_value_total_USD_w5
		";
		 
		tokenize `farmer_varlist';
		local vars=wordcount("`farmer_varlist'");
		di `vars';
		#d cr
	
		
	
	/*******************************************************************************
		append Manyara and Kilimanjaro Farmer Data sets
	*******************************************************************************/
		
		use "${analysis}/Manyara Farmer_mktaccess.dta", clear
			drop secf*
			rename (survey_district survey_ward survey_village) (district ward village_name)
			tempfile manyarafarmer
			save `manyarafarmer'
		
		use "${analysis}/Kilimanjaro Farmer_mktaccess.dta", clear
			// bring in the sampling weight
			merge m:1 village_name ward district using "${analysis}/Kilimanjaro Census.dta"
			keep if _merge==3
			rename census_numhh_pervillage numhh_pervillage
			
			append using `manyarafarmer'
	
		cap drop village_cluster_id villageunique
		egen village_cluster_id = group(district ward village_name)
		egen villageunique = tag(village_cluster_id)
		
		
		// fix the total harvested value USD by using the median price for both regions
			cap drop harv_value_total_USD_w5
			sum sale_maize_P_120kg_16, d
			local maizeprice = r(p50)
			gen harv_value_total_USD_w5 = harvest_kg_w5*`maizeprice'/120/2300
		
		// the total harvested value USD by using the median price for each region
			sum sale_maize_P_120kg_16 if survey_region=="MANYARA", d
			local maizeprice_MAN = r(p50)
			sum sale_maize_P_120kg_16 if survey_region=="KILIMANJARO", d
			local maizeprice_KILI = r(p50)
			
			gen harv_value2_total_USD_w5 = harvest_kg_w5*`maizeprice_MAN'/120/2300 if survey_region=="MANYARA"
			replace harv_value2_total_USD_w5= harvest_kg_w5*`maizeprice_KILI'/120/2300 if survey_region=="KILIMANJARO"
		
		
		// log transformation
		//foreach var in harvest_kg_w5{
		//	replace `var' = ln(harvest_kg_w5)
		//}
		
		// generate MARKET ACCESS PROXIES
			gen DH_access = .
			foreach city in moshi arusha babati dodoma tanga{
				replace DH_access = 0 if !mi(google_vil_`city'_km)
			}
			foreach city in moshi arusha babati dodoma tanga{
				gen tau_`city' = 2.7*(0.9392 + 0.02019*google_vil_`city'_km)/25
				replace DH_access = DH_access + (1 + tau_`city')^(${elasticity})*pop_frac_`city'
			}
			egen DH_access_std = std(DH_access)
			replace DH_access_std = -DH_access_std
			
			egen std_google_vil_city_km	=std(google_vil_city_km)
			
		
	bys survey_region: fsum `farmer_varlist'
	bys survey_region: distinct village_cluster_id
	
	
	/*******************************************************************************
		run analysis
	*******************************************************************************/
	
	#d;
	pause off;
	local i 1;
	while `i' <= `vars' {;
		local append_replace="append";
		if `i'==1 {; local append_replace="replace";};
			 
		xi: reg ``i'' `indep' [pweight=${sampweight}], cluster(village_cluster_id);
		
		mean ``i'' if e(sample) [pweight=${sampweight}];
		local mean=_b[``i''];
		sum ``i'' if e(sample);
		local sd=`r(sd)';
		
		xi: reg ``i'' `indep' [pweight=${sampweight}], cluster(village_cluster_id) ; // need to run this one more time because estimation is lost due to "mean" command
		quietly outreg2 using "${pool_results}/PooledRegions Farmer`filename'.xls", nonote se symb(***,**,*) `append_replace' dec(2)
		 addstat("#of villages", e(N_clust), "mean of depvar", `mean', "sd of depvar", `sd') keep(`indep');
		
		
		local i = `i' + 1;
	};
	#d cr
}
		 
exit


/*



